home *** CD-ROM | disk | FTP | other *** search
- Path: xanth!mcnc!uvaarpa!haven!ames!sun-barr!newstop!sun!swap!page
- From: page%swap@Sun.COM (Bob Page)
- Newsgroups: comp.sources.amiga
- Subject: v89i218: rcs - revision control system, Part03/14
- Message-ID: <128094@sun.Eng.Sun.COM>
- Date: 19 Nov 89 09:24:26 GMT
- Sender: news@sun.Eng.Sun.COM
- Lines: 2839
- Approved: page@sun.com
-
- Submitted-by: rsbx@cbmvax.commodore.com (Raymond S. Brand)
- Posting-number: Volume 89, Issue 218
- Archive-name: unix/rcs.03
-
- # This is a shell archive.
- # Remove anything above and including the cut line.
- # Then run the rest of the file through 'sh'.
- # Unpacked files will be owned by you and have default permissions.
- #----cut here-----cut here-----cut here-----cut here----#
- #!/bin/sh
- # shar: SHell ARchive
- # Run the following text through 'sh' to create:
- # diff/dir.c
- # diff/ed.c
- # diff/io.c
- # diff/normal.c
- # diff/regex.c
- # This is archive 3 of a 14-part kit.
- # This archive created: Sun Nov 19 01:12:05 1989
- if `test ! -d diff`
- then
- mkdir diff
- echo "mkdir diff"
- fi
- echo "extracting diff/dir.c"
- sed 's/^X//' << \SHAR_EOF > diff/dir.c
- X/* Read, sort and compare two directories. Used for GNU DIFF.
- X Copyright (C) 1988, 1989 Free Software Foundation, Inc.
- X
- XThis file is part of GNU DIFF.
- X
- XGNU DIFF is free software; you can redistribute it and/or modify
- Xit under the terms of the GNU General Public License as published by
- Xthe Free Software Foundation; either version 1, or (at your option)
- Xany later version.
- X
- XGNU DIFF is distributed in the hope that it will be useful,
- Xbut WITHOUT ANY WARRANTY; without even the implied warranty of
- XMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- XGNU General Public License for more details.
- X
- XYou should have received a copy of the GNU General Public License
- Xalong with GNU DIFF; see the file COPYING. If not, write to
- Xthe Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
- X
- X#include "diff.h"
- X
- Xstatic int compare_names ();
- X
- X/* Read the directory named DIRNAME and return a sorted vector
- X of filenames for its contents. NONEX nonzero means this directory is
- X known to be nonexistent, so return zero files. */
- X
- Xstruct dirdata
- X{
- X int length; /* # elements in `files' */
- X char **files; /* Sorted names of files in the dir */
- X};
- X
- Xstatic struct dirdata
- Xdir_sort (dirname, nonex)
- X char *dirname;
- X int nonex;
- X{
- X register DIR *reading;
- X register struct direct *next;
- X struct dirdata dirdata;
- X int compare_names ();
- X
- X /* Address of block containing the files that are described. */
- X char **files;
- X
- X /* Length of block that `files' points to, measured in files. */
- X int nfiles;
- X
- X /* Index of first unused in `files'. */
- X int files_index;
- X
- X if (nonex)
- X {
- X dirdata.length = 0;
- X dirdata.files = 0;
- X return dirdata;
- X }
- X
- X /* Open the directory and check for errors. */
- X reading = opendir (dirname);
- X if (!reading)
- X {
- X perror_with_name (dirname);
- X dirdata.length = -1;
- X return dirdata;
- X }
- X
- X /* Initialize the table of filenames. */
- X
- X nfiles = 100;
- X files = (char **) xmalloc (nfiles * sizeof (char *));
- X files_index = 0;
- X
- X /* Read the directory entries, and insert the subfiles
- X into the `files' table. */
- X
- X while (next = readdir (reading))
- X {
- X /* Ignore the files `.' and `..' */
- X if (next->d_name[0] == '.'
- X && (next->d_name[1] == 0
- X || (next->d_name[1] == '.'
- X && next->d_name[2] == 0)))
- X continue;
- X
- X if (files_index == nfiles)
- X {
- X nfiles *= 2;
- X files
- X = (char **) xrealloc (files, sizeof (char *) * nfiles);
- X }
- X files[files_index++] = concat (next->d_name, "", "");
- X }
- X
- X closedir (reading);
- X
- X /* Sort the table. */
- X qsort (files, files_index, sizeof (char *), compare_names);
- X
- X /* Return a description of location and length of the table. */
- X dirdata.files = files;
- X dirdata.length = files_index;
- X
- X return dirdata;
- X}
- X
- X/* Sort the files now in the table. */
- X
- Xstatic int
- Xcompare_names (file1, file2)
- X char **file1, **file2;
- X{
- X return strcmp (*file1, *file2);
- X}
- X
- X/* Compare the contents of two directories named NAME1 and NAME2.
- X This is a top-level routine; it does everything necessary for diff
- X on two directories.
- X
- X NONEX1 nonzero says directory NAME1 doesn't exist, but pretend it is
- X empty. Likewise NONEX2.
- X
- X HANDLE_FILE is a caller-provided subroutine called to handle each file.
- X It gets five operands: dir and name (rel to original working dir) of file
- X in dir 1, dir and name pathname of file in dir 2, and the recursion depth.
- X
- X For a file that appears in only one of the dirs, one of the name-args
- X to HANDLE_FILE is zero.
- X
- X DEPTH is the current depth in recursion.
- X
- X Returns the maximum of all the values returned by HANDLE_FILE,
- X or 2 if trouble is encountered in opening files. */
- X
- Xint
- Xdiff_dirs (name1, name2, handle_file, depth, nonex1, nonex2)
- X char *name1, *name2;
- X int (*handle_file) ();
- X int nonex1, nonex2;
- X{
- X struct dirdata data1, data2;
- X register int i1, i2;
- X int val = 0;
- X int v1;
- X
- X /* Get sorted contents of both dirs. */
- X data1 = dir_sort (name1, nonex1);
- X data2 = dir_sort (name2, nonex2);
- X if (data1.length == -1 || data2.length == -1)
- X {
- X if (data1.length >= 0)
- X free (data1.files);
- X if (data2.length >= 0)
- X free (data2.files);
- X return 2;
- X }
- X
- X i1 = 0;
- X i2 = 0;
- X
- X /* If -Sname was specified, and this is the topmost level of comparison,
- X ignore all file names less than the specified starting name. */
- X
- X if (dir_start_file && depth == 0)
- X {
- X while (i1 < data1.length && strcmp (data1.files[i1], dir_start_file) < 0)
- X i1++;
- X while (i2 < data2.length && strcmp (data2.files[i2], dir_start_file) < 0)
- X i2++;
- X }
- X
- X /* Loop while files remain in one or both dirs. */
- X while (i1 < data1.length || i2 < data2.length)
- X {
- X int nameorder;
- X
- X /* Compare next name in dir 1 with next name in dir 2.
- X At the end of a dir,
- X pretend the "next name" in that dir is very large. */
- X
- X if (i1 == data1.length)
- X nameorder = 1;
- X else if (i2 == data2.length)
- X nameorder = -1;
- X else
- X nameorder = strcmp (data1.files[i1], data2.files[i2]);
- X
- X if (nameorder == 0)
- X {
- X /* We have found a file (or subdir) in common between both dirs.
- X Compare the two files. */
- X v1 = (*handle_file) (name1, data1.files[i1], name2, data2.files[i2],
- X depth + 1);
- X i1++, i2++;
- X }
- X if (nameorder < 0)
- X {
- X /* Next filename in dir 1 is less; that is a file in dir 1 only. */
- X v1 = (*handle_file) (name1, data1.files[i1], name2, 0, depth + 1);
- X i1++;
- X }
- X if (nameorder > 0)
- X {
- X /* Next filename in dir 2 is less; that is a file in dir 2 only. */
- X v1 = (*handle_file) (name1, 0, name2, data2.files[i2], depth + 1);
- X i2++;
- X }
- X if (v1 > val)
- X val = v1;
- X }
- X free (data1.files);
- X free (data2.files);
- X
- X return val;
- X}
- SHAR_EOF
- echo "extracting diff/ed.c"
- sed 's/^X//' << \SHAR_EOF > diff/ed.c
- X/* Output routines for ed-script format.
- X Copyright (C) 1988, 1989 Free Software Foundation, Inc.
- X
- XThis file is part of GNU DIFF.
- X
- XGNU DIFF is free software; you can redistribute it and/or modify
- Xit under the terms of the GNU General Public License as published by
- Xthe Free Software Foundation; either version 1, or (at your option)
- Xany later version.
- X
- XGNU DIFF is distributed in the hope that it will be useful,
- Xbut WITHOUT ANY WARRANTY; without even the implied warranty of
- XMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- XGNU General Public License for more details.
- X
- XYou should have received a copy of the GNU General Public License
- Xalong with GNU DIFF; see the file COPYING. If not, write to
- Xthe Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
- X
- X#include "diff.h"
- X
- Xstatic void print_rcs_hunk ();
- Xstatic void print_ed_hunk ();
- Xstatic void pr_forward_ed_hunk ();
- Xstatic void print_range_length ();
- Xvoid translate_range ();
- Xstruct change *find_change ();
- Xstruct change *find_reverse_change ();
- X
- X/* Print our script as ed commands. */
- X
- Xvoid
- Xprint_ed_script (script)
- X struct change *script;
- X{
- X print_script (script, find_reverse_change, print_ed_hunk);
- X}
- X
- X/* Print a hunk of an ed diff */
- X
- Xstatic void
- Xprint_ed_hunk (hunk)
- X struct change *hunk;
- X{
- X int f0, l0, f1, l1;
- X int deletes, inserts;
- X
- X#if 0
- X hunk = flip_script (hunk);
- X#endif
- X#ifdef MYDEBUG
- X debug_script (hunk);
- X#endif
- X
- X /* Determine range of line numbers involved in each file. */
- X analyze_hunk (hunk, &f0, &l0, &f1, &l1, &deletes, &inserts);
- X if (!deletes && !inserts)
- X return;
- X
- X /* Print out the line number header for this hunk */
- X print_number_range (',', &files[0], f0, l0);
- X fprintf (outfile, "%c\n", change_letter (inserts, deletes));
- X
- X /* Print new/changed lines from second file, if needed */
- X if (inserts)
- X {
- X int i;
- X int inserting = 1;
- X for (i = f1; i <= l1; i++)
- X {
- X /* Resume the insert, if we stopped. */
- X if (! inserting)
- X fprintf (outfile, "%da\n",
- X i - f1 + translate_line_number (&files[0], f0) - 1);
- X inserting = 1;
- X
- X /* If the file's line is just a dot, it would confuse `ed'.
- X So output it with a double dot, and set the flag LEADING_DOT
- X so that we will output another ed-command later
- X to change the double dot into a single dot. */
- X
- X if (files[1].linbuf[i].text[0] == '.'
- X && files[1].linbuf[i].text[1] == '\n')
- X {
- X fprintf (outfile, "..\n");
- X fprintf (outfile, ".\n");
- X /* Now change that double dot to the desired single dot. */
- X fprintf (outfile, "%ds/^\\.\\././\n",
- X i - f1 + translate_line_number (&files[0], f0));
- X inserting = 0;
- X }
- X else
- X /* Line is not `.', so output it unmodified. */
- X print_1_line ("", &files[1].linbuf[i]);
- X }
- X
- X /* End insert mode, if we are still in it. */
- X if (inserting)
- X fprintf (outfile, ".\n");
- X }
- X}
- X
- X/* Print change script in the style of ed commands,
- X but print the changes in the order they appear in the input files,
- X which means that the commands are not truly useful with ed. */
- X
- Xvoid
- Xpr_forward_ed_script (script)
- X struct change *script;
- X{
- X print_script (script, find_change, pr_forward_ed_hunk);
- X}
- X
- Xstatic void
- Xpr_forward_ed_hunk (hunk)
- X struct change *hunk;
- X{
- X int i;
- X int f0, l0, f1, l1;
- X int deletes, inserts;
- X
- X /* Determine range of line numbers involved in each file. */
- X analyze_hunk (hunk, &f0, &l0, &f1, &l1, &deletes, &inserts);
- X if (!deletes && !inserts)
- X return;
- X
- X fprintf (outfile, "%c", change_letter (inserts, deletes));
- X print_number_range (' ', files, f0, l0);
- X fprintf (outfile, "\n");
- X
- X /* If deletion only, print just the number range. */
- X
- X if (!inserts)
- X return;
- X
- X /* For insertion (with or without deletion), print the number range
- X and the lines from file 2. */
- X
- X for (i = f1; i <= l1; i++)
- X print_1_line ("", &files[1].linbuf[i]);
- X
- X fprintf (outfile, ".\n");
- X}
- X
- X/* Print in a format somewhat like ed commands
- X except that each insert command states the number of lines it inserts.
- X This format is used for RCS. */
- X
- Xvoid
- Xprint_rcs_script (script)
- X struct change *script;
- X{
- X print_script (script, find_change, print_rcs_hunk);
- X}
- X
- X/* Print a hunk of an RCS diff */
- X
- Xstatic void
- Xprint_rcs_hunk (hunk)
- X struct change *hunk;
- X{
- X int i;
- X int f0, l0, f1, l1;
- X int deletes, inserts;
- X int tf0, tl0, tf1, tl1;
- X
- X /* Determine range of line numbers involved in each file. */
- X analyze_hunk (hunk, &f0, &l0, &f1, &l1, &deletes, &inserts);
- X if (!deletes && !inserts)
- X return;
- X
- X translate_range (&files[0], f0, l0, &tf0, &tl0);
- X
- X if (deletes)
- X {
- X fprintf (outfile, "d");
- X /* For deletion, print just the starting line number from file 0
- X and the number of lines deleted. */
- X fprintf (outfile, "%d %d\n",
- X tf0,
- X (tl0 >= tf0 ? tl0 - tf0 + 1 : 1));
- X }
- X
- X if (inserts)
- X {
- X fprintf (outfile, "a");
- X
- X /* Take last-line-number from file 0 and # lines from file 1. */
- X translate_range (&files[1], f1, l1, &tf1, &tl1);
- X fprintf (outfile, "%d %d\n",
- X tl0,
- X (tl1 >= tf1 ? tl1 - tf1 + 1 : 1));
- X
- X /* Print the inserted lines. */
- X for (i = f1; i <= l1; i++)
- X print_1_line ("", &files[1].linbuf[i]);
- X }
- X}
- SHAR_EOF
- echo "extracting diff/io.c"
- sed 's/^X//' << \SHAR_EOF > diff/io.c
- X/* File I/O for GNU DIFF.
- X Copyright (C) 1988, 1989 Free Software Foundation, Inc.
- X
- XThis file is part of GNU DIFF.
- X
- XGNU DIFF is free software; you can redistribute it and/or modify
- Xit under the terms of the GNU General Public License as published by
- Xthe Free Software Foundation; either version 1, or (at your option)
- Xany later version.
- X
- XGNU DIFF is distributed in the hope that it will be useful,
- Xbut WITHOUT ANY WARRANTY; without even the implied warranty of
- XMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- XGNU General Public License for more details.
- X
- XYou should have received a copy of the GNU General Public License
- Xalong with GNU DIFF; see the file COPYING. If not, write to
- Xthe Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
- X
- X#include "diff.h"
- X
- X/* Rotate a value n bits to the left. */
- X#define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
- X#define ROL(v, n) ((v) << (n) | (v) >> UINT_BIT - (n))
- X
- X/* Given a hash value and a new character, return a new hash value. */
- X#define HASH(h, c) ((c) + ROL (h, 7))
- X
- X/* Current file under consideration. */
- Xstruct file_data *current;
- X
- X/* Check for binary files and compare them for exact identity. */
- X
- X/* Return 1 if BUF contains a character with the 0200 bit set.
- X SIZE is the number of characters in BUF. */
- X
- Xstatic int
- Xbinary_file_p (buf, size)
- X char *buf;
- X int size;
- X{
- X while (--size >= 0)
- X if (*buf++ & 0200)
- X return 1;
- X return 0;
- X}
- X
- Xint binary_file_threshold = 512;
- X
- X/* Slurp the current file completely into core.
- X Return nonzero if it appears to be a binary file. */
- X
- Xstatic int
- Xslurp ()
- X{
- X /* If we have a nonexistent file at this stage, treat it as empty. */
- X if (current->desc < 0)
- X {
- X current->bufsize = 0;
- X current->buffered_chars = 0;
- X current->buffer = 0;
- X }
- X /* If it's a regular file, we can just get the size out of the stat
- X block and slurp it in all at once. */
- X /* In all cases, we leave room in the buffer for 2 extra chars
- X beyond those that current->bufsize describes:
- X one for a newline (in case the text does not end with one)
- X and one for a sentinel in find_identical_ends. */
- X#ifdef AMIGA
- X else if (current->stat.st_type < 0)
- X#else
- X else if ((current->stat.st_mode & S_IFMT) == S_IFREG)
- X#endif
- X {
- X current->bufsize = current->stat.st_size;
- X current->buffer = (char *) xmalloc (current->bufsize + 2);
- X current->buffered_chars
- X = read (current->desc, current->buffer, current->bufsize);
- X if (current->buffered_chars < 0)
- X pfatal_with_name (current->name);
- X }
- X else
- X {
- X int cc;
- X
- X current->bufsize = 4096;
- X current->buffer = (char *) xmalloc (current->bufsize + 2);
- X current->buffered_chars = 0;
- X
- X /* Not a regular file; read it in a little at a time, growing the
- X buffer as necessary. */
- X while ((cc = read (current->desc,
- X current->buffer + current->buffered_chars,
- X current->bufsize - current->buffered_chars))
- X > 0)
- X {
- X current->buffered_chars += cc;
- X if (current->buffered_chars == current->bufsize)
- X {
- X current->bufsize = current->bufsize * 2;
- X current->buffer = (char *) xrealloc (current->buffer,
- X current->bufsize + 2);
- X }
- X }
- X if (cc < 0)
- X pfatal_with_name (current->name);
- X }
- X
- X /* Check first part of file to see if it's a binary file. */
- X if (! always_text_flag
- X && binary_file_p (current->buffer,
- X min (current->buffered_chars, binary_file_threshold)))
- X return 1;
- X
- X /* If not binary, make sure text ends in a newline,
- X but remember that we had to add one. */
- X if (current->buffered_chars > 0
- X && current->buffer[current->buffered_chars - 1] != '\n')
- X {
- X current->missing_newline = 1;
- X current->buffer[current->buffered_chars++] = '\n';
- X }
- X else
- X current->missing_newline = 0;
- X
- X return 0;
- X}
- X
- X/* Split the file into lines, simultaneously computing the hash codes for
- X each line. */
- X
- Xvoid
- Xfind_and_hash_each_line ()
- X{
- X unsigned h;
- X int i;
- X unsigned char *p = (unsigned char *) current->prefix_end, *ip, c;
- X
- X /* Attempt to get a good initial guess as to the number of lines. */
- X current->linbufsize = current->buffered_chars / 50 + 5;
- X current->linbuf
- X = (struct line_def *) xmalloc (current->linbufsize * sizeof (struct line_def));
- X
- X if (function_regexp)
- X {
- X /* If the -C or -F option is used, we need to find the lines
- X of the matching prefix. At least we will need to find the last few,
- X but since we don't know how many, it's easiest to find them all. */
- X current->buffered_lines = 0;
- X p = (unsigned char *) current->buffer;
- X }
- X else
- X {
- X /* Skip the identical prefixes, except be prepared to handle context.
- X In fact, handle 1 more preceding line than the context says,
- X in case shift_boundaries moves things backwards in this file. */
- X current->buffered_lines = current->prefix_lines - context - 1;
- X if (current->buffered_lines < 0)
- X current->buffered_lines = 0;
- X for (i = 0; i < context + 1; ++i)
- X /* Unless we are at the beginning, */
- X if ((char *) p != current->buffer)
- X /* Back up at least 1 char until at the start of a line. */
- X while ((char *) --p != current->buffer && p[-1] != '\n')
- X ;
- X }
- X
- X while ((char *) p < current->suffix_begin)
- X {
- X h = 0;
- X ip = p;
- X
- X if (current->prefix_end <= (char *) p)
- X {
- X /* Hash this line until we find a newline. */
- X if (ignore_case_flag)
- X {
- X if (ignore_all_space_flag)
- X while ((c = *p) != '\n')
- X {
- X if (! isspace (c))
- X if (isupper (c))
- X h = HASH (h, tolower (c));
- X else
- X h = HASH (h, c);
- X ++p;
- X }
- X else if (ignore_space_change_flag)
- X
- X while ((c = *p) != '\n')
- X {
- X if (c == ' ' || c == '\t')
- X {
- X while ((c = *p) == ' ' || c == '\t')
- X ++p;
- X if (c == '\n')
- X break;
- X h = HASH (h, ' ');
- X }
- X else if (isupper (c))
- X h = HASH (h, tolower (c));
- X else
- X h = HASH (h, c);
- X ++p;
- X }
- X else
- X while ((c = *p) != '\n')
- X {
- X if (isupper (c))
- X h = HASH (h, tolower (c));
- X else
- X h = HASH (h, c);
- X ++p;
- X }
- X }
- X else
- X {
- X if (ignore_all_space_flag)
- X while ((c = *p) != '\n')
- X {
- X if (! isspace (c))
- X h = HASH (h, c);
- X ++p;
- X }
- X else if (ignore_space_change_flag)
- X while ((c = *p) != '\n')
- X {
- X if (c == ' ' || c == '\t')
- X {
- X while ((c = *p) == ' ' || c == '\t')
- X ++p;
- X if (c == '\n')
- X break;
- X h = HASH (h, ' ');
- X }
- X else
- X h = HASH (h, c);
- X ++p;
- X }
- X else
- X while ((c = *p) != '\n')
- X {
- X h = HASH (h, c);
- X ++p;
- X }
- X }
- X }
- X else
- X /* This line is part of the matching prefix,
- X so we don't need to hash it. */
- X while (*p != '\n')
- X ++p;
- X
- X /* Maybe increase the size of the line table. */
- X if (current->buffered_lines >= current->linbufsize)
- X {
- X while (current->buffered_lines >= current->linbufsize)
- X current->linbufsize *= 2;
- X current->linbuf
- X = (struct line_def *) xrealloc (current->linbuf,
- X current->linbufsize
- X * sizeof (struct line_def));
- X }
- X current->linbuf[current->buffered_lines].text = (char *) ip;
- X current->linbuf[current->buffered_lines].length = p - ip;
- X current->linbuf[current->buffered_lines].hash = h;
- X ++current->buffered_lines;
- X ++p;
- X }
- X
- X i = 0;
- X while (i < context && (char *) p < current->buffer + current->buffered_chars)
- X {
- X ip = p;
- X while (*p++ != '\n')
- X ;
- X /* Maybe increase the size of the line table. */
- X if (current->buffered_lines >= current->linbufsize)
- X {
- X while (current->buffered_lines >= current->linbufsize)
- X current->linbufsize *= 2;
- X current->linbuf
- X = (struct line_def *) xrealloc (current->linbuf,
- X current->linbufsize
- X * sizeof (struct line_def));
- X }
- X current->linbuf[current->buffered_lines].text = (char *) ip;
- X current->linbuf[current->buffered_lines].length = p - ip - 1;
- X current->linbuf[current->buffered_lines].hash = 0;
- X ++current->buffered_lines;
- X ++i;
- X }
- X}
- X
- X/* Given a vector of two file_data objects, find the identical
- X prefixes and suffixes of each object. */
- X
- Xstatic void
- Xfind_identical_ends (filevec)
- X struct file_data filevec[];
- X{
- X char *p0, *p1, *end0, *beg0;
- X int lines;
- X
- X if (filevec[0].buffered_chars == 0 || filevec[1].buffered_chars == 0)
- X {
- X filevec[0].prefix_end = filevec[0].buffer;
- X filevec[1].prefix_end = filevec[1].buffer;
- X filevec[0].prefix_lines = filevec[1].prefix_lines = 0;
- X filevec[0].suffix_begin = filevec[0].buffer + filevec[0].buffered_chars;
- X filevec[1].suffix_begin = filevec[1].buffer + filevec[1].buffered_chars;
- X filevec[0].suffix_lines = filevec[1].suffix_lines = 0;
- X return;
- X }
- X
- X /* Find identical prefix. */
- X
- X p0 = filevec[0].buffer;
- X p1 = filevec[1].buffer;
- X lines = 0;
- X
- X /* Insert end "sentinels", in this case characters that are guarranteed
- X to make the equality test false, and thus terminate the loop. */
- X
- X if (filevec[0].buffered_chars < filevec[1].buffered_chars)
- X p0[filevec[0].buffered_chars] = ~p1[filevec[0].buffered_chars];
- X else
- X p1[filevec[1].buffered_chars] = ~p0[filevec[1].buffered_chars];
- X
- X /* Loop until first mismatch, or to the sentinel characters. */
- X while (1)
- X {
- X char c = *p0++;
- X if (c != *p1++)
- X break;
- X if (c == '\n')
- X ++lines;
- X }
- X
- X /* If the sentinel was passed, and lengths are equal, the
- X files are identical. */
- X if (p0 - filevec[0].buffer > filevec[0].buffered_chars
- X && filevec[0].buffered_chars == filevec[1].buffered_chars)
- X {
- X filevec[0].prefix_end = p0 - 1;
- X filevec[1].prefix_end = p1 - 1;
- X filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
- X filevec[0].suffix_begin = filevec[0].buffer;
- X filevec[1].suffix_begin = filevec[1].buffer;
- X filevec[0].suffix_lines = filevec[1].suffix_lines = lines;
- X return;
- X }
- X
- X /* Point at first nonmatching characters. */
- X --p0, --p1;
- X
- X /* Skip back to last line-beginning in the prefix. */
- X while (p0 != filevec[0].buffer && p0[-1] != '\n')
- X --p0, --p1;
- X
- X /* Record the prefix. */
- X filevec[0].prefix_end = p0;
- X filevec[1].prefix_end = p1;
- X filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
- X
- X /* Find identical suffix. */
- X
- X /* P0 and P1 point beyond the last chars not yet compared. */
- X p0 = filevec[0].buffer + filevec[0].buffered_chars;
- X p1 = filevec[1].buffer + filevec[1].buffered_chars;
- X lines = 0;
- X end0 = p0; /* Addr of last char in file 0. */
- X
- X /* Get value of P0 at which we should stop scanning backward:
- X this is when either P0 or P1 points at the last char
- X of the identical prefix. */
- X if (filevec[0].buffered_chars < filevec[1].buffered_chars)
- X beg0 = filevec[0].prefix_end - 1;
- X else
- X beg0 = (filevec[0].prefix_end - 1
- X + filevec[1].buffered_chars - filevec[0].buffered_chars);
- X
- X /* Scan back until chars don't match or we reach that point. */
- X while (p0 != beg0)
- X {
- X char c = *--p0;
- X if (c != *--p1)
- X break;
- X if (c == '\n')
- X ++lines;
- X }
- X
- X /* Make P0 and P1 point at the first char of the matching suffix. */
- X ++p0, ++p1;
- X
- X /* Advance to next place that is a line-beginning in both files. */
- X while (p0 != end0
- X && !((p0 == filevec[0].buffer || p0[-1] == '\n')
- X &&
- X (p1 == filevec[1].buffer || p1[-1] == '\n')))
- X ++p0, ++p1;
- X
- X /* Subtract one, since the last newline isn't followed by a line. */
- X --lines;
- X
- X /* Record the suffix. */
- X filevec[0].suffix_begin = p0;
- X filevec[1].suffix_begin = p1;
- X filevec[0].suffix_lines = filevec[1].suffix_lines = lines;
- X}
- X
- X/* Lines are put into equivalence classes (of lines that match in line_cmp).
- X Each equivalence class is represented by one of these structures,
- X but only while the classes are being computed.
- X Afterward, each class is represented by a number. */
- Xstruct equivclass
- X{
- X struct equivclass *next; /* Next item in this bucket. */
- X struct line_def line; /* A line that fits this class. */
- X};
- X
- X/* Hash-table: array of buckets, each being a chain of equivalence classes. */
- Xstatic struct equivclass **buckets;
- X
- X/* Size of the bucket array. */
- Xstatic int nbuckets;
- X
- X/* Array in which the equivalence classes are allocated.
- X The bucket-chains go through the elements in this array.
- X The number of an equivalence class is its index in this array. */
- Xstatic struct equivclass *equivs;
- X
- X/* Index of first free element in the array `equivs'. */
- Xstatic int equivs_index;
- X
- X/* Size allocated to the array `equivs'. */
- Xstatic int equivs_alloc;
- X
- X/* Largest primes less than some power of two, for nbuckets. Values range
- X from useful to preposterous. If one of these numbers isn't prime
- X after all, don't blame it on me, blame it on primes (6) . . . */
- Xstatic int primes[] =
- X{
- X 509,
- X 1021,
- X 2039,
- X 4093,
- X 8191,
- X 16381,
- X 32749,
- X 65521,
- X 131071,
- X 262139,
- X 524287,
- X 1048573,
- X 2097143,
- X 4194301,
- X 8388593,
- X 16777213,
- X 33554393,
- X 67108859, /* Preposterously large . . . */
- X -1
- X};
- X
- X/* Index of current nbuckets in primes. */
- Xstatic int primes_index;
- X
- X/* Find the equiv class associated with line N of the current file. */
- X
- Xstatic int
- Xfind_equiv_class (n)
- X int n;
- X{
- X int bucket = current->linbuf[n].hash % nbuckets;
- X struct equivclass *b = buckets[bucket], *p = NULL;
- X
- X /* Equivalence class 0 is permanently allocated to lines that were
- X not hashed because they were parts of identical prefixes or
- X suffixes. */
- X if (n < current->prefix_lines
- X || current->linbuf[n].text >= current->suffix_begin)
- X return 0;
- X
- X /* Check through the appropriate bucket to see if there isn't already
- X an equivalence class for this line. */
- X while (b)
- X {
- X if (b->line.hash == current->linbuf[n].hash
- X && (b->line.length == current->linbuf[n].length
- X /* Lines of different lengths can match with certain options. */
- X || length_varies)
- X && !line_cmp (&b->line, ¤t->linbuf[n]))
- X return b - equivs;
- X p = b, b = b->next;
- X }
- X
- X /* Create a new equivalence class in this bucket. */
- X
- X p = &equivs[equivs_index++];
- X p->next = buckets[bucket];
- X buckets[bucket] = p;
- X p->line = current->linbuf[n];
- X
- X return equivs_index - 1;
- X}
- X
- X/* Given a vector of two file_data objects, read the file associated
- X with each one, and build the table of equivalence classes.
- X Return nonzero if either file appears to be a binary file. */
- X
- Xint
- Xread_files (filevec)
- X struct file_data filevec[];
- X{
- X int i, j;
- X int binary = 0;
- X int this_binary;
- X
- X current = &filevec[0];
- X binary = this_binary = slurp ();
- X
- X current = &filevec[1];
- X this_binary = slurp ();
- X if (binary || this_binary)
- X return 1;
- X
- X find_identical_ends (filevec);
- X
- X for (i = 0; i < 2; ++i)
- X {
- X current = &filevec[i];
- X find_and_hash_each_line ();
- X }
- X
- X /* This is guaranteed to be enough space. */
- X equivs_alloc = filevec[0].buffered_lines + filevec[1].buffered_lines + 1;
- X equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass));
- X /* Equivalence class 0 is permanently safe for lines that were not
- X hashed. Real equivalence classes start at 1. */
- X equivs_index = 1;
- X
- X primes_index = 0;
- X while (primes[primes_index] < equivs_alloc / 3)
- X primes_index++;
- X
- X buckets = (struct equivclass **) xmalloc (primes[primes_index] * sizeof (struct equivclass *));
- X bzero (buckets, primes[primes_index] * sizeof (struct equivclass *));
- X nbuckets = primes[primes_index];
- X
- X for (i = 0; i < 2; ++i)
- X {
- X current = &filevec[i];
- X current->equivs
- X = (int *) xmalloc (current->buffered_lines * sizeof (int));
- X for (j = 0; j < current->buffered_lines; ++j)
- X current->equivs[j] = find_equiv_class (j);
- X }
- X
- X filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
- X
- X free (equivs);
- X free (buckets);
- X
- X return 0;
- X}
- SHAR_EOF
- echo "extracting diff/normal.c"
- sed 's/^X//' << \SHAR_EOF > diff/normal.c
- X/* Normal-format output routines for GNU DIFF.
- X Copyright (C) 1988, 1989 Free Software Foundation, Inc.
- X
- XThis file is part of GNU DIFF.
- X
- XGNU DIFF is free software; you can redistribute it and/or modify
- Xit under the terms of the GNU General Public License as published by
- Xthe Free Software Foundation; either version 1, or (at your option)
- Xany later version.
- X
- XGNU DIFF is distributed in the hope that it will be useful,
- Xbut WITHOUT ANY WARRANTY; without even the implied warranty of
- XMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- XGNU General Public License for more details.
- X
- XYou should have received a copy of the GNU General Public License
- Xalong with GNU DIFF; see the file COPYING. If not, write to
- Xthe Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
- X
- X
- X#include "diff.h"
- X
- Xvoid print_normal_hunk ();
- Xvoid print_number_range ();
- Xstruct change *find_change ();
- X
- X/* Print the edit-script SCRIPT as a normal diff.
- X INF points to an array of descriptions of the two files. */
- X
- Xvoid
- Xprint_normal_script (script)
- X struct change *script;
- X{
- X print_script (script, find_change, print_normal_hunk);
- X}
- X
- X/* Print a hunk of a normal diff.
- X This is a contiguous portion of a complete edit script,
- X describing changes in consecutive lines. */
- X
- Xvoid
- Xprint_normal_hunk (hunk)
- X struct change *hunk;
- X{
- X int first0, last0, first1, last1, deletes, inserts;
- X register int i;
- X
- X /* Determine range of line numbers involved in each file. */
- X analyze_hunk (hunk, &first0, &last0, &first1, &last1, &deletes, &inserts);
- X if (!deletes && !inserts)
- X return;
- X
- X /* Print out the line number header for this hunk */
- X print_number_range (',', &files[0], first0, last0);
- X fprintf (outfile, "%c", change_letter (inserts, deletes));
- X print_number_range (',', &files[1], first1, last1);
- X fprintf (outfile, "\n");
- X
- X /* Print the lines that the first file has. */
- X if (deletes)
- X for (i = first0; i <= last0; i++)
- X print_1_line ("<", &files[0].linbuf[i]);
- X
- X if (inserts && deletes)
- X fprintf (outfile, "---\n");
- X
- X /* Print the lines that the second file has. */
- X if (inserts)
- X for (i = first1; i <= last1; i++)
- X print_1_line (">", &files[1].linbuf[i]);
- X}
- SHAR_EOF
- echo "extracting diff/regex.c"
- sed 's/^X//' << \SHAR_EOF > diff/regex.c
- X/* Extended regular expression matching and search library.
- X Copyright (C) 1985, 1989 Free Software Foundation, Inc.
- X
- X This program is free software; you can redistribute it and/or modify
- X it under the terms of the GNU General Public License as published by
- X the Free Software Foundation; either version 1, or (at your option)
- X any later version.
- X
- X This program is distributed in the hope that it will be useful,
- X but WITHOUT ANY WARRANTY; without even the implied warranty of
- X MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- X GNU General Public License for more details.
- X
- X You should have received a copy of the GNU General Public License
- X along with this program; if not, write to the Free Software
- X Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- X
- X
- X In other words, you are welcome to use, share and improve this program.
- X You are forbidden to forbid anyone else to use, share and improve
- X what you give them. Help stamp out software-hoarding! */
- X
- X
- X/* To test, compile with -Dtest.
- X This Dtestable feature turns this into a self-contained program
- X which reads a pattern, describes how it compiles,
- X then reads a string and searches for it. */
- X
- X#ifdef AMIGA
- X#define bcmp(a,b,l) memcmp(a,b,l)
- X#define bcopy(f,t,l) movmem(f,t,l)
- X#define bzero(a,l) memset(a,'0',l)
- X#define alloca(n) malloc(n)
- X#endif
- X
- X#ifdef emacs
- X
- X/* The `emacs' switch turns on certain special matching commands
- X that make sense only in emacs. */
- X
- X#include "config.h"
- X#include "lisp.h"
- X#include "buffer.h"
- X#include "syntax.h"
- X
- X#else /* not emacs */
- X
- X#ifdef USG
- X#ifndef BSTRING
- X#define bcopy(s,d,n) memcpy((d),(s),(n))
- X#define bcmp(s1,s2,n) memcmp((s1),(s2),(n))
- X#define bzero(s,n) memset((s),0,(n))
- X#endif
- X#endif
- X
- X/* Make alloca work the best possible way. */
- X#ifdef __GNUC__
- X#define alloca __builtin_alloca
- X#else
- X#ifdef sparc
- X#include <alloca.h>
- X#endif
- X#endif
- X
- X/*
- X * Define the syntax stuff, so we can do the \<...\> things.
- X */
- X
- X#ifndef Sword /* must be non-zero in some of the tests below... */
- X#define Sword 1
- X#endif
- X
- X#define SYNTAX(c) re_syntax_table[c]
- X
- X#ifdef SYNTAX_TABLE
- X
- Xchar *re_syntax_table;
- X
- X#else
- X
- Xstatic char re_syntax_table[256];
- X
- Xstatic void
- Xinit_syntax_once ()
- X{
- X register int c;
- X static int done = 0;
- X
- X if (done)
- X return;
- X
- X bzero (re_syntax_table, sizeof re_syntax_table);
- X
- X for (c = 'a'; c <= 'z'; c++)
- X re_syntax_table[c] = Sword;
- X
- X for (c = 'A'; c <= 'Z'; c++)
- X re_syntax_table[c] = Sword;
- X
- X for (c = '0'; c <= '9'; c++)
- X re_syntax_table[c] = Sword;
- X
- X done = 1;
- X}
- X
- X#endif /* SYNTAX_TABLE */
- X#endif /* not emacs */
- X
- X#include "regex.h"
- X
- X/* Number of failure points to allocate space for initially,
- X when matching. If this number is exceeded, more space is allocated,
- X so it is not a hard limit. */
- X
- X#ifndef NFAILURES
- X#define NFAILURES 80
- X#endif /* NFAILURES */
- X
- X/* width of a byte in bits */
- X
- X#define BYTEWIDTH 8
- X
- X#ifndef SIGN_EXTEND_CHAR
- X#define SIGN_EXTEND_CHAR(x) (x)
- X#endif
- X
- Xstatic int obscure_syntax = 0;
- X
- X/* Specify the precise syntax of regexp for compilation.
- X This provides for compatibility for various utilities
- X which historically have different, incompatible syntaxes.
- X
- X The argument SYNTAX is a bit-mask containing the two bits
- X RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
- X
- Xint
- Xre_set_syntax (syntax)
- X{
- X int ret;
- X
- X ret = obscure_syntax;
- X obscure_syntax = syntax;
- X return ret;
- X}
- X
- X/* re_compile_pattern takes a regular-expression string
- X and converts it into a buffer full of byte commands for matching.
- X
- X PATTERN is the address of the pattern string
- X SIZE is the length of it.
- X BUFP is a struct re_pattern_buffer * which points to the info
- X on where to store the byte commands.
- X This structure contains a char * which points to the
- X actual space, which should have been obtained with malloc.
- X re_compile_pattern may use realloc to grow the buffer space.
- X
- X The number of bytes of commands can be found out by looking in
- X the struct re_pattern_buffer that bufp pointed to,
- X after re_compile_pattern returns.
- X*/
- X
- X#define PATPUSH(ch) (*b++ = (char) (ch))
- X
- X#define PATFETCH(c) \
- X {if (p == pend) goto end_of_pattern; \
- X c = * (unsigned char *) p++; \
- X if (translate) c = translate[c]; }
- X
- X#define PATFETCH_RAW(c) \
- X {if (p == pend) goto end_of_pattern; \
- X c = * (unsigned char *) p++; }
- X
- X#define PATUNFETCH p--
- X
- X#define EXTEND_BUFFER \
- X { char *old_buffer = bufp->buffer; \
- X if (bufp->allocated == (1<<16)) goto too_big; \
- X bufp->allocated *= 2; \
- X if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
- X if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
- X goto memory_exhausted; \
- X c = bufp->buffer - old_buffer; \
- X b += c; \
- X if (fixup_jump) \
- X fixup_jump += c; \
- X if (laststart) \
- X laststart += c; \
- X begalt += c; \
- X if (pending_exact) \
- X pending_exact += c; \
- X }
- X
- Xstatic int store_jump (), insert_jump ();
- X
- Xchar *
- Xre_compile_pattern (pattern, size, bufp)
- X char *pattern;
- X int size;
- X struct re_pattern_buffer *bufp;
- X{
- X register char *b = bufp->buffer;
- X register char *p = pattern;
- X char *pend = pattern + size;
- X register unsigned c, c1;
- X char *p1;
- X unsigned char *translate = (unsigned char *) bufp->translate;
- X
- X /* address of the count-byte of the most recently inserted "exactn" command.
- X This makes it possible to tell whether a new exact-match character
- X can be added to that command or requires a new "exactn" command. */
- X
- X char *pending_exact = 0;
- X
- X /* address of the place where a forward-jump should go
- X to the end of the containing expression.
- X Each alternative of an "or", except the last, ends with a forward-jump
- X of this sort. */
- X
- X char *fixup_jump = 0;
- X
- X /* address of start of the most recently finished expression.
- X This tells postfix * where to find the start of its operand. */
- X
- X char *laststart = 0;
- X
- X /* In processing a repeat, 1 means zero matches is allowed */
- X
- X char zero_times_ok;
- X
- X /* In processing a repeat, 1 means many matches is allowed */
- X
- X char many_times_ok;
- X
- X /* address of beginning of regexp, or inside of last \( */
- X
- X char *begalt = b;
- X
- X /* Stack of information saved by \( and restored by \).
- X Four stack elements are pushed by each \(:
- X First, the value of b.
- X Second, the value of fixup_jump.
- X Third, the value of regnum.
- X Fourth, the value of begalt. */
- X
- X int stackb[40];
- X int *stackp = stackb;
- X int *stacke = stackb + 40;
- X int *stackt;
- X
- X /* Counts \('s as they are encountered. Remembered for the matching \),
- X where it becomes the "register number" to put in the stop_memory command */
- X
- X int regnum = 1;
- X
- X bufp->fastmap_accurate = 0;
- X
- X#ifndef emacs
- X#ifndef SYNTAX_TABLE
- X /*
- X * Initialize the syntax table.
- X */
- X init_syntax_once();
- X#endif
- X#endif
- X
- X if (bufp->allocated == 0)
- X {
- X bufp->allocated = 28;
- X if (bufp->buffer)
- X /* EXTEND_BUFFER loses when bufp->allocated is 0 */
- X bufp->buffer = (char *) realloc (bufp->buffer, 28);
- X else
- X /* Caller did not allocate a buffer. Do it for him */
- X bufp->buffer = (char *) malloc (28);
- X if (!bufp->buffer) goto memory_exhausted;
- X begalt = b = bufp->buffer;
- X }
- X
- X while (p != pend)
- X {
- X if (b - bufp->buffer > bufp->allocated - 10)
- X /* Note that EXTEND_BUFFER clobbers c */
- X EXTEND_BUFFER;
- X
- X PATFETCH (c);
- X
- X switch (c)
- X {
- X case '$':
- X if (obscure_syntax & RE_TIGHT_VBAR)
- X {
- X if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
- X goto normal_char;
- X /* Make operand of last vbar end before this `$'. */
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X fixup_jump = 0;
- X PATPUSH (endline);
- X break;
- X }
- X
- X /* $ means succeed if at end of line, but only in special contexts.
- X If randomly in the middle of a pattern, it is a normal character. */
- X if (p == pend || *p == '\n'
- X || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
- X || (obscure_syntax & RE_NO_BK_PARENS
- X ? *p == ')'
- X : *p == '\\' && p[1] == ')')
- X || (obscure_syntax & RE_NO_BK_VBAR
- X ? *p == '|'
- X : *p == '\\' && p[1] == '|'))
- X {
- X PATPUSH (endline);
- X break;
- X }
- X goto normal_char;
- X
- X case '^':
- X /* ^ means succeed if at beg of line, but only if no preceding pattern. */
- X
- X if (laststart && p[-2] != '\n'
- X && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
- X goto normal_char;
- X if (obscure_syntax & RE_TIGHT_VBAR)
- X {
- X if (p != pattern + 1
- X && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
- X goto normal_char;
- X PATPUSH (begline);
- X begalt = b;
- X }
- X else
- X PATPUSH (begline);
- X break;
- X
- X case '+':
- X case '?':
- X if (obscure_syntax & RE_BK_PLUS_QM)
- X goto normal_char;
- X handle_plus:
- X case '*':
- X /* If there is no previous pattern, char not special. */
- X if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
- X goto normal_char;
- X /* If there is a sequence of repetition chars,
- X collapse it down to equivalent to just one. */
- X zero_times_ok = 0;
- X many_times_ok = 0;
- X while (1)
- X {
- X zero_times_ok |= c != '+';
- X many_times_ok |= c != '?';
- X if (p == pend)
- X break;
- X PATFETCH (c);
- X if (c == '*')
- X ;
- X else if (!(obscure_syntax & RE_BK_PLUS_QM)
- X && (c == '+' || c == '?'))
- X ;
- X else if ((obscure_syntax & RE_BK_PLUS_QM)
- X && c == '\\')
- X {
- X int c1;
- X PATFETCH (c1);
- X if (!(c1 == '+' || c1 == '?'))
- X {
- X PATUNFETCH;
- X PATUNFETCH;
- X break;
- X }
- X c = c1;
- X }
- X else
- X {
- X PATUNFETCH;
- X break;
- X }
- X }
- X
- X /* Star, etc. applied to an empty pattern is equivalent
- X to an empty pattern. */
- X if (!laststart)
- X break;
- X
- X /* Now we know whether 0 matches is allowed,
- X and whether 2 or more matches is allowed. */
- X if (many_times_ok)
- X {
- X /* If more than one repetition is allowed,
- X put in a backward jump at the end. */
- X store_jump (b, maybe_finalize_jump, laststart - 3);
- X b += 3;
- X }
- X insert_jump (on_failure_jump, laststart, b + 3, b);
- X pending_exact = 0;
- X b += 3;
- X if (!zero_times_ok)
- X {
- X /* At least one repetition required: insert before the loop
- X a skip over the initial on-failure-jump instruction */
- X insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
- X b += 3;
- X }
- X break;
- X
- X case '.':
- X laststart = b;
- X PATPUSH (anychar);
- X break;
- X
- X case '[':
- X while (b - bufp->buffer
- X > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
- X /* Note that EXTEND_BUFFER clobbers c */
- X EXTEND_BUFFER;
- X
- X laststart = b;
- X if (*p == '^')
- X PATPUSH (charset_not), p++;
- X else
- X PATPUSH (charset);
- X p1 = p;
- X
- X PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
- X /* Clear the whole map */
- X bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
- X /* Read in characters and ranges, setting map bits */
- X while (1)
- X {
- X PATFETCH (c);
- X if (c == ']' && p != p1 + 1) break;
- X if (*p == '-' && p[1] != ']')
- X {
- X PATFETCH (c1);
- X PATFETCH (c1);
- X while (c <= c1)
- X b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
- X }
- X else
- X {
- X b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
- X }
- X }
- X /* Discard any bitmap bytes that are all 0 at the end of the map.
- X Decrement the map-length byte too. */
- X while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
- X b[-1]--;
- X b += b[-1];
- X break;
- X
- X case '(':
- X if (! (obscure_syntax & RE_NO_BK_PARENS))
- X goto normal_char;
- X else
- X goto handle_open;
- X
- X case ')':
- X if (! (obscure_syntax & RE_NO_BK_PARENS))
- X goto normal_char;
- X else
- X goto handle_close;
- X
- X case '\n':
- X if (! (obscure_syntax & RE_NEWLINE_OR))
- X goto normal_char;
- X else
- X goto handle_bar;
- X
- X case '|':
- X if (! (obscure_syntax & RE_NO_BK_VBAR))
- X goto normal_char;
- X else
- X goto handle_bar;
- X
- X case '\\':
- X if (p == pend) goto invalid_pattern;
- X PATFETCH_RAW (c);
- X switch (c)
- X {
- X case '(':
- X if (obscure_syntax & RE_NO_BK_PARENS)
- X goto normal_backsl;
- X handle_open:
- X if (stackp == stacke) goto nesting_too_deep;
- X if (regnum < RE_NREGS)
- X {
- X PATPUSH (start_memory);
- X PATPUSH (regnum);
- X }
- X *stackp++ = b - bufp->buffer;
- X *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
- X *stackp++ = regnum++;
- X *stackp++ = begalt - bufp->buffer;
- X fixup_jump = 0;
- X laststart = 0;
- X begalt = b;
- X break;
- X
- X case ')':
- X if (obscure_syntax & RE_NO_BK_PARENS)
- X goto normal_backsl;
- X handle_close:
- X if (stackp == stackb) goto unmatched_close;
- X begalt = *--stackp + bufp->buffer;
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X if (stackp[-1] < RE_NREGS)
- X {
- X PATPUSH (stop_memory);
- X PATPUSH (stackp[-1]);
- X }
- X stackp -= 2;
- X fixup_jump = 0;
- X if (*stackp)
- X fixup_jump = *stackp + bufp->buffer - 1;
- X laststart = *--stackp + bufp->buffer;
- X break;
- X
- X case '|':
- X if (obscure_syntax & RE_NO_BK_VBAR)
- X goto normal_backsl;
- X handle_bar:
- X insert_jump (on_failure_jump, begalt, b + 6, b);
- X pending_exact = 0;
- X b += 3;
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X fixup_jump = b;
- X b += 3;
- X laststart = 0;
- X begalt = b;
- X break;
- X
- X#ifdef emacs
- X case '=':
- X PATPUSH (at_dot);
- X break;
- X
- X case 's':
- X laststart = b;
- X PATPUSH (syntaxspec);
- X PATFETCH (c);
- X PATPUSH (syntax_spec_code[c]);
- X break;
- X
- X case 'S':
- X laststart = b;
- X PATPUSH (notsyntaxspec);
- X PATFETCH (c);
- X PATPUSH (syntax_spec_code[c]);
- X break;
- X#endif /* emacs */
- X
- X case 'w':
- X laststart = b;
- X PATPUSH (wordchar);
- X break;
- X
- X case 'W':
- X laststart = b;
- X PATPUSH (notwordchar);
- X break;
- X
- X case '<':
- X PATPUSH (wordbeg);
- X break;
- X
- X case '>':
- X PATPUSH (wordend);
- X break;
- X
- X case 'b':
- X PATPUSH (wordbound);
- X break;
- X
- X case 'B':
- X PATPUSH (notwordbound);
- X break;
- X
- X case '`':
- X PATPUSH (begbuf);
- X break;
- X
- X case '\'':
- X PATPUSH (endbuf);
- X break;
- X
- X case '1':
- X case '2':
- X case '3':
- X case '4':
- X case '5':
- X case '6':
- X case '7':
- X case '8':
- X case '9':
- X c1 = c - '0';
- X if (c1 >= regnum)
- X goto normal_char;
- X for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
- X if (*stackt == c1)
- X goto normal_char;
- X laststart = b;
- X PATPUSH (duplicate);
- X PATPUSH (c1);
- X break;
- X
- X case '+':
- X case '?':
- X if (obscure_syntax & RE_BK_PLUS_QM)
- X goto handle_plus;
- X
- X default:
- X normal_backsl:
- X /* You might think it would be useful for \ to mean
- X not to translate; but if we don't translate it
- X it will never match anything. */
- X if (translate) c = translate[c];
- X goto normal_char;
- X }
- X break;
- X
- X default:
- X normal_char:
- X if (!pending_exact || pending_exact + *pending_exact + 1 != b
- X || *pending_exact == 0177 || *p == '*' || *p == '^'
- X || ((obscure_syntax & RE_BK_PLUS_QM)
- X ? *p == '\\' && (p[1] == '+' || p[1] == '?')
- X : (*p == '+' || *p == '?')))
- X {
- X laststart = b;
- X PATPUSH (exactn);
- X pending_exact = b;
- X PATPUSH (0);
- X }
- X PATPUSH (c);
- X (*pending_exact)++;
- X }
- X }
- X
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X
- X if (stackp != stackb) goto unmatched_open;
- X
- X bufp->used = b - bufp->buffer;
- X return 0;
- X
- X invalid_pattern:
- X return "Invalid regular expression";
- X
- X unmatched_open:
- X return "Unmatched \\(";
- X
- X unmatched_close:
- X return "Unmatched \\)";
- X
- X end_of_pattern:
- X return "Premature end of regular expression";
- X
- X nesting_too_deep:
- X return "Nesting too deep";
- X
- X too_big:
- X return "Regular expression too big";
- X
- X memory_exhausted:
- X return "Memory exhausted";
- X}
- X
- X/* Store where `from' points a jump operation to jump to where `to' points.
- X `opcode' is the opcode to store. */
- X
- Xstatic int
- Xstore_jump (from, opcode, to)
- X char *from, *to;
- X char opcode;
- X{
- X from[0] = opcode;
- X from[1] = (to - (from + 3)) & 0377;
- X from[2] = (to - (from + 3)) >> 8;
- X return(0);
- X}
- X
- X/* Open up space at char FROM, and insert there a jump to TO.
- X CURRENT_END gives te end of the storage no in use,
- X so we know how much data to copy up.
- X OP is the opcode of the jump to insert.
- X
- X If you call this function, you must zero out pending_exact. */
- X
- Xstatic int
- Xinsert_jump (op, from, to, current_end)
- X char op;
- X char *from, *to, *current_end;
- X{
- X register char *pto = current_end + 3;
- X register char *pfrom = current_end;
- X while (pfrom != from)
- X *--pto = *--pfrom;
- X store_jump (from, op, to);
- X return(0);
- X}
- X
- X/* Given a pattern, compute a fastmap from it.
- X The fastmap records which of the (1 << BYTEWIDTH) possible characters
- X can start a string that matches the pattern.
- X This fastmap is used by re_search to skip quickly over totally implausible text.
- X
- X The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
- X as bufp->fastmap.
- X The other components of bufp describe the pattern to be used. */
- X
- Xvoid
- Xre_compile_fastmap (bufp)
- X struct re_pattern_buffer *bufp;
- X{
- X unsigned char *pattern = (unsigned char *) bufp->buffer;
- X int size = bufp->used;
- X register char *fastmap = bufp->fastmap;
- X register unsigned char *p = pattern;
- X register unsigned char *pend = pattern + size;
- X register int j;
- X unsigned char *translate = (unsigned char *) bufp->translate;
- X
- X unsigned char *stackb[NFAILURES];
- X unsigned char **stackp = stackb;
- X
- X bzero (fastmap, (1 << BYTEWIDTH));
- X bufp->fastmap_accurate = 1;
- X bufp->can_be_null = 0;
- X
- X while (p)
- X {
- X if (p == pend)
- X {
- X bufp->can_be_null = 1;
- X break;
- X }
- X#ifdef SWITCH_ENUM_BUG
- X switch ((int) ((enum regexpcode) *p++))
- X#else
- X switch ((enum regexpcode) *p++)
- X#endif
- X {
- X case exactn:
- X if (translate)
- X fastmap[translate[p[1]]] = 1;
- X else
- X fastmap[p[1]] = 1;
- X break;
- X
- X case begline:
- X case before_dot:
- X case at_dot:
- X case after_dot:
- X case begbuf:
- X case endbuf:
- X case wordbound:
- X case notwordbound:
- X case wordbeg:
- X case wordend:
- X continue;
- X
- X case endline:
- X if (translate)
- X fastmap[translate['\n']] = 1;
- X else
- X fastmap['\n'] = 1;
- X if (bufp->can_be_null != 1)
- X bufp->can_be_null = 2;
- X break;
- X
- X case finalize_jump:
- X case maybe_finalize_jump:
- X case jump:
- X case dummy_failure_jump:
- X bufp->can_be_null = 1;
- X j = *p++ & 0377;
- X j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p += j + 1; /* The 1 compensates for missing ++ above */
- X if (j > 0)
- X continue;
- X /* Jump backward reached implies we just went through
- X the body of a loop and matched nothing.
- X Opcode jumped to should be an on_failure_jump.
- X Just treat it like an ordinary jump.
- X For a * loop, it has pushed its failure point already;
- X if so, discard that as redundant. */
- X if ((enum regexpcode) *p != on_failure_jump)
- X continue;
- X p++;
- X j = *p++ & 0377;
- X j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p += j + 1; /* The 1 compensates for missing ++ above */
- X if (stackp != stackb && *stackp == p)
- X stackp--;
- X continue;
- X
- X case on_failure_jump:
- X j = *p++ & 0377;
- X j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p++;
- X *++stackp = p + j;
- X continue;
- X
- X case start_memory:
- X case stop_memory:
- X p++;
- X continue;
- X
- X case duplicate:
- X bufp->can_be_null = 1;
- X fastmap['\n'] = 1;
- X case anychar:
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (j != '\n')
- X fastmap[j] = 1;
- X if (bufp->can_be_null)
- X return;
- X /* Don't return; check the alternative paths
- X so we can set can_be_null if appropriate. */
- X break;
- X
- X case wordchar:
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) == Sword)
- X fastmap[j] = 1;
- X break;
- X
- X case notwordchar:
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) != Sword)
- X fastmap[j] = 1;
- X break;
- X
- X#ifdef emacs
- X case syntaxspec:
- X k = *p++;
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) == (enum syntaxcode) k)
- X fastmap[j] = 1;
- X break;
- X
- X case notsyntaxspec:
- X k = *p++;
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) != (enum syntaxcode) k)
- X fastmap[j] = 1;
- X break;
- X#endif /* emacs */
- X
- X case charset:
- X for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
- X if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
- X {
- X if (translate)
- X fastmap[translate[j]] = 1;
- X else
- X fastmap[j] = 1;
- X }
- X break;
- X
- X case charset_not:
- X /* Chars beyond end of map must be allowed */
- X for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
- X if (translate)
- X fastmap[translate[j]] = 1;
- X else
- X fastmap[j] = 1;
- X
- X for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
- X if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
- X {
- X if (translate)
- X fastmap[translate[j]] = 1;
- X else
- X fastmap[j] = 1;
- X }
- X break;
- X }
- X
- X /* Get here means we have successfully found the possible starting characters
- X of one path of the pattern. We need not follow this path any farther.
- X Instead, look at the next alternative remembered in the stack. */
- X if (stackp != stackb)
- X p = *stackp--;
- X else
- X break;
- X }
- X}
- X
- X/* Like re_search_2, below, but only one string is specified. */
- X
- Xint
- Xre_search (pbufp, string, size, startpos, range, regs)
- X struct re_pattern_buffer *pbufp;
- X char *string;
- X int size, startpos, range;
- X struct re_registers *regs;
- X{
- X return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
- X}
- X
- X/* Like re_match_2 but tries first a match starting at index STARTPOS,
- X then at STARTPOS + 1, and so on.
- X RANGE is the number of places to try before giving up.
- X If RANGE is negative, the starting positions tried are
- X STARTPOS, STARTPOS - 1, etc.
- X It is up to the caller to make sure that range is not so large
- X as to take the starting position outside of the input strings.
- X
- XThe value returned is the position at which the match was found,
- X or -1 if no match was found,
- X or -2 if error (such as failure stack overflow). */
- X
- Xint
- Xre_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
- X struct re_pattern_buffer *pbufp;
- X char *string1, *string2;
- X int size1, size2;
- X int startpos;
- X register int range;
- X struct re_registers *regs;
- X int mstop;
- X{
- X register char *fastmap = pbufp->fastmap;
- X register unsigned char *translate = (unsigned char *) pbufp->translate;
- X int total = size1 + size2;
- X int val;
- X
- X /* Update the fastmap now if not correct already */
- X if (fastmap && !pbufp->fastmap_accurate)
- X re_compile_fastmap (pbufp);
- X
- X /* Don't waste time in a long search for a pattern
- X that says it is anchored. */
- X if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
- X && range > 0)
- X {
- X if (startpos > 0)
- X return -1;
- X else
- X range = 1;
- X }
- X
- X while (1)
- X {
- X /* If a fastmap is supplied, skip quickly over characters
- X that cannot possibly be the start of a match.
- X Note, however, that if the pattern can possibly match
- X the null string, we must test it at each starting point
- X so that we take the first null string we get. */
- X
- X if (fastmap && startpos < total && pbufp->can_be_null != 1)
- X {
- X if (range > 0)
- X {
- X register int lim = 0;
- X register unsigned char *p;
- X int irange = range;
- X if (startpos < size1 && startpos + range >= size1)
- X lim = range - (size1 - startpos);
- X
- X p = ((unsigned char *)
- X &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
- X
- X if (translate)
- X {
- X while (range > lim && !fastmap[translate[*p++]])
- X range--;
- X }
- X else
- X {
- X while (range > lim && !fastmap[*p++])
- X range--;
- X }
- X startpos += irange - range;
- X }
- X else
- X {
- X register unsigned char c;
- X if (startpos >= size1)
- X c = string2[startpos - size1];
- X else
- X c = string1[startpos];
- X c &= 0xff;
- X if (translate ? !fastmap[translate[c]] : !fastmap[c])
- X goto advance;
- X }
- X }
- X
- X if (range >= 0 && startpos == total
- X && fastmap && pbufp->can_be_null == 0)
- X return -1;
- X
- X val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
- X if (0 <= val)
- X {
- X if (val == -2)
- X return -2;
- X return startpos;
- X }
- X
- X#ifdef C_ALLOCA
- X alloca (0);
- X#endif /* C_ALLOCA */
- X
- X advance:
- X if (!range) break;
- X if (range > 0) range--, startpos++; else range++, startpos--;
- X }
- X return -1;
- X}
- X
- X#ifndef emacs /* emacs never uses this */
- Xint
- Xre_match (pbufp, string, size, pos, regs)
- X struct re_pattern_buffer *pbufp;
- X char *string;
- X int size, pos;
- X struct re_registers *regs;
- X{
- X return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
- X}
- X#endif /* emacs */
- X
- X/* Maximum size of failure stack. Beyond this, overflow is an error. */
- X
- Xint re_max_failures = 2000;
- X
- Xstatic int bcmp_translate();
- X/* Match the pattern described by PBUFP
- X against data which is the virtual concatenation of STRING1 and STRING2.
- X SIZE1 and SIZE2 are the sizes of the two data strings.
- X Start the match at position POS.
- X Do not consider matching past the position MSTOP.
- X
- X If pbufp->fastmap is nonzero, then it had better be up to date.
- X
- X The reason that the data to match are specified as two components
- X which are to be regarded as concatenated
- X is so this function can be used directly on the contents of an Emacs buffer.
- X
- X -1 is returned if there is no match. -2 is returned if there is
- X an error (such as match stack overflow). Otherwise the value is the length
- X of the substring which was matched. */
- X
- Xint
- Xre_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
- X struct re_pattern_buffer *pbufp;
- X unsigned char *string1, *string2;
- X int size1, size2;
- X int pos;
- X struct re_registers *regs;
- X int mstop;
- X{
- X register unsigned char *p = (unsigned char *) pbufp->buffer;
- X register unsigned char *pend = p + pbufp->used;
- X /* End of first string */
- X unsigned char *end1;
- X /* End of second string */
- X unsigned char *end2;
- X /* Pointer just past last char to consider matching */
- X unsigned char *end_match_1, *end_match_2;
- X register unsigned char *d, *dend;
- X register int mcnt;
- X unsigned char *translate = (unsigned char *) pbufp->translate;
- X
- X /* Failure point stack. Each place that can handle a failure further down the line
- X pushes a failure point on this stack. It consists of two char *'s.
- X The first one pushed is where to resume scanning the pattern;
- X the second pushed is where to resume scanning the strings.
- X If the latter is zero, the failure point is a "dummy".
- X If a failure happens and the innermost failure point is dormant,
- X it discards that failure point and tries the next one. */
- X
- X unsigned char *initial_stack[2 * NFAILURES];
- X unsigned char **stackb = initial_stack;
- X unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
- X
- X /* Information on the "contents" of registers.
- X These are pointers into the input strings; they record
- X just what was matched (on this attempt) by some part of the pattern.
- X The start_memory command stores the start of a register's contents
- X and the stop_memory command stores the end.
- X
- X At that point, regstart[regnum] points to the first character in the register,
- X regend[regnum] points to the first character beyond the end of the register,
- X regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
- X and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
- X
- X unsigned char *regstart[RE_NREGS];
- X unsigned char *regend[RE_NREGS];
- X unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
- X
- X /* Set up pointers to ends of strings.
- X Don't allow the second string to be empty unless both are empty. */
- X if (!size2)
- X {
- X string2 = string1;
- X size2 = size1;
- X string1 = 0;
- X size1 = 0;
- X }
- X end1 = string1 + size1;
- X end2 = string2 + size2;
- X
- X /* Compute where to stop matching, within the two strings */
- X if (mstop <= size1)
- X {
- X end_match_1 = string1 + mstop;
- X end_match_2 = string2;
- X }
- X else
- X {
- X end_match_1 = end1;
- X end_match_2 = string2 + mstop - size1;
- X }
- X
- X /* Initialize \) text positions to -1
- X to mark ones that no \( or \) has been seen for. */
- X
- X for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
- X regend[mcnt] = (unsigned char *) -1;
- X
- X /* `p' scans through the pattern as `d' scans through the data.
- X `dend' is the end of the input string that `d' points within.
- X `d' is advanced into the following input string whenever necessary,
- X but this happens before fetching;
- X therefore, at the beginning of the loop,
- X `d' can be pointing at the end of a string,
- X but it cannot equal string2. */
- X
- X if (pos <= size1)
- X d = string1 + pos, dend = end_match_1;
- X else
- X d = string2 + pos - size1, dend = end_match_2;
- X
- X/* Write PREFETCH; just before fetching a character with *d. */
- X#define PREFETCH \
- X while (d == dend) \
- X { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
- X d = string2; /* end of string1 => advance to string2. */ \
- X dend = end_match_2; }
- X
- X /* This loop loops over pattern commands.
- X It exits by returning from the function if match is complete,
- X or it drops through if match fails at this starting point in the input data. */
- X
- X while (1)
- X {
- X if (p == pend)
- X /* End of pattern means we have succeeded! */
- X {
- X /* If caller wants register contents data back, convert it to indices */
- X if (regs)
- X {
- X regs->start[0] = pos;
- X if (dend == end_match_1)
- X regs->end[0] = d - string1;
- X else
- X regs->end[0] = d - string2 + size1;
- X for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
- X {
- X if (regend[mcnt] == (unsigned char *) -1)
- X {
- X regs->start[mcnt] = -1;
- X regs->end[mcnt] = -1;
- X continue;
- X }
- X if (regstart_seg1[mcnt])
- X regs->start[mcnt] = regstart[mcnt] - string1;
- X else
- X regs->start[mcnt] = regstart[mcnt] - string2 + size1;
- X if (regend_seg1[mcnt])
- X regs->end[mcnt] = regend[mcnt] - string1;
- X else
- X regs->end[mcnt] = regend[mcnt] - string2 + size1;
- X }
- X }
- X if (dend == end_match_1)
- X return (d - string1 - pos);
- X else
- X return d - string2 + size1 - pos;
- X }
- X
- X /* Otherwise match next pattern command */
- X#ifdef SWITCH_ENUM_BUG
- X switch ((int) ((enum regexpcode) *p++))
- X#else
- X switch ((enum regexpcode) *p++)
- X#endif
- X {
- X
- X /* \( is represented by a start_memory, \) by a stop_memory.
- X Both of those commands contain a "register number" argument.
- X The text matched within the \( and \) is recorded under that number.
- X Then, \<digit> turns into a `duplicate' command which
- X is followed by the numeric value of <digit> as the register number. */
- X
- X case start_memory:
- X regstart[*p] = d;
- X regstart_seg1[*p++] = (dend == end_match_1);
- X break;
- X
- X case stop_memory:
- X regend[*p] = d;
- X regend_seg1[*p++] = (dend == end_match_1);
- X break;
- X
- X case duplicate:
- X {
- X int regno = *p++; /* Get which register to match against */
- X register unsigned char *d2, *dend2;
- X
- X d2 = regstart[regno];
- X dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
- X ? regend[regno] : end_match_1);
- X while (1)
- X {
- X /* Advance to next segment in register contents, if necessary */
- X while (d2 == dend2)
- X {
- X if (dend2 == end_match_2) break;
- X if (dend2 == regend[regno]) break;
- X d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
- X }
- X /* At end of register contents => success */
- X if (d2 == dend2) break;
- X
- X /* Advance to next segment in data being matched, if necessary */
- X PREFETCH;
- X
- X /* mcnt gets # consecutive chars to compare */
- X mcnt = dend - d;
- X if (mcnt > dend2 - d2)
- X mcnt = dend2 - d2;
- X /* Compare that many; failure if mismatch, else skip them. */
- X if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
- X goto fail;
- X d += mcnt, d2 += mcnt;
- X }
- X }
- X break;
- X
- X case anychar:
- X /* fetch a data character */
- X PREFETCH;
- X /* Match anything but a newline. */
- X if ((translate ? translate[*d++] : *d++) == '\n')
- X goto fail;
- X break;
- X
- X case charset:
- X case charset_not:
- X {
- X /* Nonzero for charset_not */
- X int not = 0;
- X register int c;
- X if (*(p - 1) == (unsigned char) charset_not)
- X not = 1;
- X
- X /* fetch a data character */
- X PREFETCH;
- X
- X if (translate)
- X c = translate [*d];
- X else
- X c = *d;
- X
- X if (c < *p * BYTEWIDTH
- X && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- X not = !not;
- X
- X p += 1 + *p;
- X
- X if (!not) goto fail;
- X d++;
- X break;
- X }
- X
- X case begline:
- X if (d == string1 || d[-1] == '\n')
- X break;
- X goto fail;
- X
- X case endline:
- X if (d == end2
- X || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
- X break;
- X goto fail;
- X
- X /* "or" constructs ("|") are handled by starting each alternative
- X with an on_failure_jump that points to the start of the next alternative.
- X Each alternative except the last ends with a jump to the joining point.
- X (Actually, each jump except for the last one really jumps
- X to the following jump, because tensioning the jumps is a hassle.) */
- X
- X /* The start of a stupid repeat has an on_failure_jump that points
- X past the end of the repeat text.
- X This makes a failure point so that, on failure to match a repetition,
- X matching restarts past as many repetitions have been found
- X with no way to fail and look for another one. */
- X
- X /* A smart repeat is similar but loops back to the on_failure_jump
- X so that each repetition makes another failure point. */
- X
- X case on_failure_jump:
- X if (stackp == stacke)
- X {
- X unsigned char **stackx;
- X if (stacke - stackb > re_max_failures * 2)
- X return -2;
- X stackx = (unsigned char **) alloca (2 * (stacke - stackb)
- X * sizeof (char *));
- X bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
- X stackp = stackx + (stackp - stackb);
- X stacke = stackx + 2 * (stacke - stackb);
- X stackb = stackx;
- X }
- X mcnt = *p++ & 0377;
- X mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p++;
- X *stackp++ = mcnt + p;
- X *stackp++ = d;
- X break;
- X
- X /* The end of a smart repeat has an maybe_finalize_jump back.
- X Change it either to a finalize_jump or an ordinary jump. */
- X
- X case maybe_finalize_jump:
- X mcnt = *p++ & 0377;
- X mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p++;
- X {
- X register unsigned char *p2 = p;
- X /* Compare what follows with the begining of the repeat.
- X If we can establish that there is nothing that they would
- X both match, we can change to finalize_jump */
- X while (p2 != pend
- X && (*p2 == (unsigned char) stop_memory
- X || *p2 == (unsigned char) start_memory))
- X p2++;
- X if (p2 == pend)
- X p[-3] = (unsigned char) finalize_jump;
- X else if (*p2 == (unsigned char) exactn
- X || *p2 == (unsigned char) endline)
- X {
- X register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
- X register unsigned char *p1 = p + mcnt;
- X /* p1[0] ... p1[2] are an on_failure_jump.
- X Examine what follows that */
- X if (p1[3] == (unsigned char) exactn && p1[5] != c)
- X p[-3] = (unsigned char) finalize_jump;
- X else if (p1[3] == (unsigned char) charset
- X || p1[3] == (unsigned char) charset_not)
- X {
- X int not = p1[3] == (unsigned char) charset_not;
- X if (c < p1[4] * BYTEWIDTH
- X && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- X not = !not;
- X /* not is 1 if c would match */
- X /* That means it is not safe to finalize */
- X if (!not)
- X p[-3] = (unsigned char) finalize_jump;
- X }
- X }
- X }
- X p -= 2;
- X if (p[-1] != (unsigned char) finalize_jump)
- X {
- X p[-1] = (unsigned char) jump;
- X goto nofinalize;
- X }
- X
- X /* The end of a stupid repeat has a finalize-jump
- X back to the start, where another failure point will be made
- X which will point after all the repetitions found so far. */
- X
- X case finalize_jump:
- X stackp -= 2;
- X
- X case jump:
- X nofinalize:
- X mcnt = *p++ & 0377;
- X mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p += mcnt + 1; /* The 1 compensates for missing ++ above */
- X break;
- X
- X case dummy_failure_jump:
- X if (stackp == stacke)
- X {
- X unsigned char **stackx
- X = (unsigned char **) alloca (2 * (stacke - stackb)
- X * sizeof (char *));
- X bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
- X stackp = stackx + (stackp - stackb);
- X stacke = stackx + 2 * (stacke - stackb);
- X stackb = stackx;
- X }
- X *stackp++ = 0;
- X *stackp++ = 0;
- X goto nofinalize;
- X
- X case wordbound:
- X if (d == string1 /* Points to first char */
- X || d == end2 /* Points to end */
- X || (d == end1 && size2 == 0)) /* Points to end */
- X break;
- X if ((SYNTAX (d[-1]) == Sword)
- X != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
- X break;
- X goto fail;
- X
- X case notwordbound:
- X if (d == string1 /* Points to first char */
- X || d == end2 /* Points to end */
- X || (d == end1 && size2 == 0)) /* Points to end */
- X goto fail;
- X if ((SYNTAX (d[-1]) == Sword)
- X != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
- X goto fail;
- X break;
- X
- X case wordbeg:
- X if (d == end2 /* Points to end */
- X || (d == end1 && size2 == 0) /* Points to end */
- X || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
- X goto fail;
- X if (d == string1 /* Points to first char */
- X || SYNTAX (d[-1]) != Sword) /* prev char not letter */
- X break;
- X goto fail;
- X
- X case wordend:
- X if (d == string1 /* Points to first char */
- X || SYNTAX (d[-1]) != Sword) /* prev char not letter */
- X goto fail;
- X if (d == end2 /* Points to end */
- X || (d == end1 && size2 == 0) /* Points to end */
- X || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
- X break;
- X goto fail;
- X
- X#ifdef emacs
- X case before_dot:
- X if (((d - string2 <= (unsigned) size2)
- X ? d - bf_p2 : d - bf_p1)
- X <= point)
- X goto fail;
- X break;
- X
- X case at_dot:
- X if (((d - string2 <= (unsigned) size2)
- X ? d - bf_p2 : d - bf_p1)
- X == point)
- X goto fail;
- X break;
- X
- X case after_dot:
- X if (((d - string2 <= (unsigned) size2)
- X ? d - bf_p2 : d - bf_p1)
- X >= point)
- X goto fail;
- X break;
- X
- X case wordchar:
- X mcnt = (int) Sword;
- X goto matchsyntax;
- X
- X case syntaxspec:
- X mcnt = *p++;
- X matchsyntax:
- X PREFETCH;
- X if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
- X break;
- X
- X case notwordchar:
- X mcnt = (int) Sword;
- X goto matchnotsyntax;
- X
- X case notsyntaxspec:
- X mcnt = *p++;
- X matchnotsyntax:
- X PREFETCH;
- X if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
- X break;
- X#else
- X case wordchar:
- X PREFETCH;
- X if (SYNTAX (*d++) == 0) goto fail;
- X break;
- X
- X case notwordchar:
- X PREFETCH;
- X if (SYNTAX (*d++) != 0) goto fail;
- X break;
- X#endif /* not emacs */
- X
- X case begbuf:
- X if (d == string1) /* Note, d cannot equal string2 */
- X break; /* unless string1 == string2. */
- X goto fail;
- X
- X case endbuf:
- X if (d == end2 || (d == end1 && size2 == 0))
- X break;
- X goto fail;
- X
- X case exactn:
- X /* Match the next few pattern characters exactly.
- X mcnt is how many characters to match. */
- X mcnt = *p++;
- X if (translate)
- X {
- X do
- X {
- X PREFETCH;
- X if (translate[*d++] != *p++) goto fail;
- X }
- X while (--mcnt);
- X }
- X else
- X {
- X do
- X {
- X PREFETCH;
- X if (*d++ != *p++) goto fail;
- X }
- X while (--mcnt);
- X }
- X break;
- X }
- X continue; /* Successfully matched one pattern command; keep matching */
- X
- X /* Jump here if any matching operation fails. */
- X fail:
- X if (stackp != stackb)
- X /* A restart point is known. Restart there and pop it. */
- X {
- X if (!stackp[-2])
- X { /* If innermost failure point is dormant, flush it and keep looking */
- X stackp -= 2;
- X goto fail;
- X }
- X d = *--stackp;
- X p = *--stackp;
- X if (d >= string1 && d <= end1)
- X dend = end_match_1;
- X }
- X else break; /* Matching at this starting point really fails! */
- X }
- X return -1; /* Failure to match */
- X}
- X
- Xstatic int
- Xbcmp_translate (s1, s2, len, translate)
- X unsigned char *s1, *s2;
- X register int len;
- X unsigned char *translate;
- X{
- X register unsigned char *p1 = s1, *p2 = s2;
- X while (len)
- X {
- X if (translate [*p1++] != translate [*p2++]) return 1;
- X len--;
- X }
- X return 0;
- X}
- X
- X/* Entry points compatible with bsd4.2 regex library */
- X
- X#ifndef emacs
- X
- Xstatic struct re_pattern_buffer re_comp_buf;
- X
- Xchar *
- Xre_comp (s)
- X char *s;
- X{
- X if (!s)
- X {
- X if (!re_comp_buf.buffer)
- X return "No previous regular expression";
- X return 0;
- X }
- X
- X if (!re_comp_buf.buffer)
- X {
- X if (!(re_comp_buf.buffer = (char *) malloc (200)))
- X return "Memory exhausted";
- X re_comp_buf.allocated = 200;
- X if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
- X return "Memory exhausted";
- X }
- X return re_compile_pattern (s, strlen (s), &re_comp_buf);
- X}
- X
- Xint
- Xre_exec (s)
- X char *s;
- X{
- X int len = strlen (s);
- X return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
- X}
- X
- X#endif /* emacs */
- X
- X#ifdef test
- X
- X#include <stdio.h>
- X
- X/* Indexed by a character, gives the upper case equivalent of the character */
- X
- Xstatic char upcase[0400] =
- X { 000, 001, 002, 003, 004, 005, 006, 007,
- X 010, 011, 012, 013, 014, 015, 016, 017,
- X 020, 021, 022, 023, 024, 025, 026, 027,
- X 030, 031, 032, 033, 034, 035, 036, 037,
- X 040, 041, 042, 043, 044, 045, 046, 047,
- X 050, 051, 052, 053, 054, 055, 056, 057,
- X 060, 061, 062, 063, 064, 065, 066, 067,
- X 070, 071, 072, 073, 074, 075, 076, 077,
- X 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
- X 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
- X 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
- X 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
- X 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
- X 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
- X 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
- X 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
- X 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
- X 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
- X 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
- X 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
- X 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
- X 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
- X 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
- X 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
- X 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
- X 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
- X 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
- X 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
- X 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
- X 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
- X 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
- X 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
- X };
- X
- Xmain (argc, argv)
- X int argc;
- X char **argv;
- X{
- X char pat[80];
- X struct re_pattern_buffer buf;
- X int i;
- X char c;
- X char fastmap[(1 << BYTEWIDTH)];
- X
- X /* Allow a command argument to specify the style of syntax. */
- X if (argc > 1)
- X obscure_syntax = atoi (argv[1]);
- X
- X buf.allocated = 40;
- X buf.buffer = (char *) malloc (buf.allocated);
- X buf.fastmap = fastmap;
- X buf.translate = upcase;
- X
- X while (1)
- X {
- X gets (pat);
- X
- X if (*pat)
- X {
- X re_compile_pattern (pat, strlen(pat), &buf);
- X
- X for (i = 0; i < buf.used; i++)
- X printchar (buf.buffer[i]);
- X
- X putchar ('\n');
- X
- X printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
- X
- X re_compile_fastmap (&buf);
- X printf ("Allowed by fastmap: ");
- X for (i = 0; i < (1 << BYTEWIDTH); i++)
- X if (fastmap[i]) printchar (i);
- X putchar ('\n');
- X }
- X
- X gets (pat); /* Now read the string to match against */
- X
- X i = re_match (&buf, pat, strlen (pat), 0, 0);
- X printf ("Match value %d.\n", i);
- X }
- X}
- X
- X#ifdef NOTDEF
- Xprint_buf (bufp)
- X struct re_pattern_buffer *bufp;
- X{
- X int i;
- X
- X printf ("buf is :\n----------------\n");
- X for (i = 0; i < bufp->used; i++)
- X printchar (bufp->buffer[i]);
- X
- X printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
- X
- X printf ("Allowed by fastmap: ");
- X for (i = 0; i < (1 << BYTEWIDTH); i++)
- X if (bufp->fastmap[i])
- X printchar (i);
- X printf ("\nAllowed by translate: ");
- X if (bufp->translate)
- X for (i = 0; i < (1 << BYTEWIDTH); i++)
- X if (bufp->translate[i])
- X printchar (i);
- X printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
- X printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
- X}
- X#endif
- X
- Xprintchar (c)
- X char c;
- X{
- X if (c < 041 || c >= 0177)
- X {
- X putchar ('\\');
- X putchar (((c >> 6) & 3) + '0');
- X putchar (((c >> 3) & 7) + '0');
- X putchar ((c & 7) + '0');
- X }
- X else
- X putchar (c);
- X}
- X
- Xerror (string)
- X char *string;
- X{
- X puts (string);
- X exit (1);
- X}
- X
- X#endif /* test */
- SHAR_EOF
- echo "End of archive 3 (of 14)"
- # if you want to concatenate archives, remove anything after this line
- exit
-